David Morley
BYU Vol 1
11/29/18
import IPython
from matplotlib import pyplot as plt
import numpy as np
from scipy.io import wavfile
from scipy import fftpack
import time
from scipy import signal
import imageio
plt.rcParams["figure.dpi"] = 300 # Fix plot quality.
plt.rcParams["figure.figsize"] = (12,3) # Change plot size / aspect (you may adjust this).
class SoundWave(object):
"""A class for working with digital audio signals."""
# Problem 1.1
def __init__(self, rate, samples):
"""Set the SoundWave class attributes.
Parameters:
rate (int): The sample rate of the sound.
samples ((n,) ndarray): NumPy array of samples.
"""
# Store each input as an attribute
self.rate = rate
self.samples = samples
# Problems 1.1 and 1.7
def plot(self,dft=False):
"""Plot the graph of the sound wave (time versus amplitude)."""
# Save the number of samples in the set
n = len(self.samples)
if dft:
plt.subplot(121) # First subplot
v = np.arange(n) * self.rate / n # Compute v in hertz
frequencies = fftpack.fft(self.samples) # Take dft of samples
plt.plot(v[:n//2],abs(frequencies[:n//2]*self.rate/n))
plt.title("Frequencies vs Magnitude")
plt.xlabel("Frequency (Hz)") # Label axis
plt.ylabel("Magnitude")
plt.xlim(left=0)
plt.subplot(122) # Change to 2nd subplot
# Calculate the number of seconds in the sample
num_seconds = n/self.rate
y = np.linspace(0, num_seconds, n)
plt.plot(y, self.samples, lw=.5) # Plot samples vs time
plt.title("Samples vs Time")
plt.ylim(-32768, 32767) # Use the given limits
plt.xlabel("Time (Seconds)") # Label axis
plt.ylabel("Samples")
plt.show() # And show!
# Problem 1.2
def export(self, filename, force=False):
"""Generate a wav file from the sample rate and samples.
If the array of samples is not of type np.int16, scale it before exporting.
Parameters:
filename (str): The name of the wav file to export the sound to.
"""
if type(samples) is not np.int16 or force:
# Take only the real component
real = np.real(self.samples)
# Scale the samples
scaled = np.int16((real * 32767.) / max(abs(real)))
# Write to the file
wavfile.write(filename, self.rate, scaled)
else:
wavfile.write(filename, self.rate, self.samples)
# Problem 1.4
def __add__(self, other):
"""Combine the samples from two SoundWave objects.
Parameters:
other (SoundWave): An object containing the samples to add
to the samples contained in this object.
Returns:
(SoundWave): A new SoundWave instance with the combined samples.
Raises:
ValueError: if the two sample arrays are not the same length.
"""
# Check that rates are matching
if self.rate != other.rate:
raise ValueError("A and B are not the same length")
# Add together the samples to generate new sound
sums = self.samples + other.samples
# Return new sound as a SoundWave object
return SoundWave(self.rate, sums)
# Problem 1.4
def __rshift__(self, other):
"""Concatentate the samples from two SoundWave objects.
Parameters:
other (SoundWave): An object containing the samples to concatenate
to the samples contained in this object.
Raises:
ValueError: if the two sample rates are not equal.
"""
# Check that rates are matching
if self.rate != other.rate:
raise ValueError("A and B are not the same length")
# Concatenate the samples to generate new sound
concat = np.concatenate((self.samples, other.samples))
# Return new sound as a SoundWave object
return SoundWave(self.rate, concat)
# Problem 2.1
def __mul__(self, other):
"""Convolve the samples from two SoundWave objects using circular convolution.
Parameters:
other (SoundWave): An object containing the samples to convolve
with the samples contained in this object.
Returns:
(SoundWave): A new SoundWave instance with the convolved samples.
Raises:
ValueError: if the two sample rates are not equal.
"""
n = len(self.samples)
m = len(other.samples)
# Check that rates are matching
if self.rate != other.rate:
raise ValueError("A and B are not the same length")
# Pad smaller array
if n < m:
pad = np.zeros(m-n)
self.samples = np.concatenate([self.samples, pad])
else:
pad = np.zeros(n-m)
other.samples = np.concatenate([other.samples, pad])
# Use 7.4 to compute convolution
convolve = fftpack.ifft(fftpack.fft(self.samples)*fftpack.fft(other.samples)).real
return SoundWave(self.rate, convolve)
# Problem 2.2
def __pow__(self, other):
"""Convolve the samples from two SoundWave objects using linear convolution.
Parameters:
other (SoundWave): An object containing the samples to convolve
with the samples contained in this object.
Returns:
(SoundWave): A new SoundWave instance with the convolved samples.
Raises:
ValueError: if the two sample rates are not equal.
"""
# Check that rates are matching
if self.rate != other.rate:
raise ValueError("A and B are not the same length")
# Store lengths of sample vectors
n = len(self.samples)
m = len(other.samples)
# Set a to be the smallest value such that 2**a >= n + m - 1
a = int(np.ceil(np.log2(n+m-1)))
# Initialize zero vectors for padding
selfNew = np.zeros(2**a)
otherNew = np.zeros(2**a)
# Use fancy indexing to 'append' zeros
selfNew[:n] = self.samples
otherNew[:m] = other.samples
# Compute the convolution
convolve = fftpack.ifft(fftpack.fft(selfNew)*fftpack.fft(otherNew))[:n+m-1].real
return SoundWave(self.rate, convolve)
# Problem 2.4
def clean(self, low_freq, high_freq):
"""Remove a range of frequencies from the samples using the DFT.
Parameters:
low_freq (float): Lower bound of the frequency range to zero out.
high_freq (float): Higher boound of the frequency range to zero out.
"""
# Initialize useful values
n = len(self.samples)
k_high = int(round(high_freq * n / self.rate))
k_low = int(round(low_freq * n / self.rate))
# Compute the dft of the samples
samples = self.samples.copy()
dft = fftpack.fft(self.samples)
# Set outlying frequencies to zero
dft[k_low:k_high] = 0
dft[n-k_high:n-k_low] = 0
return SoundWave(self.rate, fftpack.ifft(dft))
SoundWave.__init__().SoundWave.plot().scipy.io.wavefile.read() and the SoundWave class to plot tada.wav.# Read in the soundfile saving the rate and samples
rate, samples = wavfile.read("tada.wav")
# Generate a Soundwave class object
tada=SoundWave(rate, samples)
# Use the plot function
tada.plot()
SoundWave.export().export() method to create two new files containing the same sound as tada.wav: one without scaling, and one with scaling (use force=True).IPython.display.Audio() to embed the original and new versions of tada.wav in the notebook.# Export the unscaled file
tada.export("tada_unscaled.wav")
# Export the scaled file
tada.export("tada_scaled.wav", force=True)
IPython.display.Audio("tada.wav")
IPython.display.Audio("tada_unscaled.wav")
IPython.display.Audio("tada_scaled.wav")
generate_note().generate_note() to create an A tone that lasts for two seconds. Embed it in the notebook.def generate_note(frequency, duration):
"""Generate an instance of the SoundWave class corresponding to
the desired soundwave. Uses sample rate of 44100 Hz.
Parameters:
frequency (float): The frequency of the desired sound.
duration (float): The length of the desired sound in seconds.
Returns:
sound (SoundWave): An instance of the SoundWave class.
"""
r = 44100
# Create an array to represent time
t = np.linspace(0, duration, r * duration)
# Create a SoundWave instance containing a tone with frequency k that lasts for s seconds.
G = SoundWave(r, np.sin(2 * np.pi * t * frequency))
return G
# Generate the A tone
A = generate_note(440, 2).samples
# Play computes product
IPython.display.Audio(rate=44100, data=A)
SoundWave.__add__().SoundWave.__rshift__().# Generate notes for chord
A = generate_note(440, 3)
C = generate_note(523.25, 3)
E = generate_note(659.25, 3)
# Add notes to make chord
Aminor = A + E + C
# Play computed product
IPython.display.Audio(rate=44100, data=Aminor.samples)
# Generate notes for arpeggio
A = generate_note(440, 1)
C = generate_note(523.25, 1)
E = generate_note(659.25, 1)
# Right shift notes to make arpeggio
arpeggio = A >> C >> E
# Play computed product
IPython.display.Audio(rate=44100, data=arpeggio.samples)
simple_dft() with the formula for $c_k$ given below.np.allclose() to check that simple_dft() and scipy.fftpack.fft() give the same result (after scaling).def simple_dft(samples):
"""Compute the DFT of an array of samples.
Parameters:
samples ((n,) ndarray): an array of samples.
Returns:
((n,) ndarray): The DFT of the given array.
"""
# Initialize size of input vector
n = len(samples)
# Create a vector of values of k
k = np.arange(n).reshape(n,1)
# Use formula and aray broadcasting to create W
W = np.exp(-2*np.pi*1j/n * k @ k.T)
# Return dft
return np.array(W @ samples / n)
# Choose size of test vector
n = 4
# Create test vector
test_array = np.random.random(n)
# Compute DFTs
myC = 4 * simple_dft(test_array)
scipyC = fftpack.fft(test_array)
# Compare sizes
np.allclose(myC, scipyC)
simple_fft().simple_dft(), simple_fft(), and scipy.fftpack.fft().
Print the runtimes of each computation.np.allclose() to check that simple_fft() and scipy.fftpack.fft() give the same result (after scaling).def simple_fft(samples, threshold=1):
"""Compute the DFT using the FFT algorithm.
Parameters:
samples ((n,) ndarray): an array of samples.
threshold (int): when a subarray of samples has fewer
elements than this integer, use simple_dft() to
compute the DFT of that subarray.
Returns:
((n,) ndarray): The DFT of the given array.
"""
# Initialize length of sample vector
n = len(samples)
# If it's less than the threshold, use the simple dft
if n <= threshold:
return simple_dft(samples)
else:
# Break up sample vector by evens and odds and run function recursively
f_even = simple_fft(samples[::2])
f_odd = simple_fft(samples[1::2])
# Create w vector
w = np.exp((-2j * np.pi/n) * np.arange(n))
# Break up output by first and second half
first_sum = f_even + w[:n//2] * f_odd
second_sum = f_even + w[n//2:] * f_odd
# Return the scaled full fft
return .5 * np.concatenate([first_sum, second_sum])
# Choose the number of samples to use
n = 8192
# Create the test array
test_array = np.random.random(n)
# Run the simple dft
start = time.time()
dft = n * simple_dft(test_array)
dft_time = time.time() - start
# Run the simple fft
start = time.time()
fft = n * simple_fft(test_array)
fft_time = time.time() - start
# Run the scipy implementation
start = time.time()
scipy = fftpack.fft(test_array)
scipy_time = time.time() - start
print(f"simple_dft: {dft_time} seconds\nsimple_fft: {fft_time} seconds\nscipy.fftpack.fft: {scipy_time} seconds")
np.allclose(fft, scipy)
SoundWave.plot() so that it accepts a boolean. When the boolean is True, take the DFT of the stored samples and plot (in a new subplot) the frequencies present on the $x$-axis and the magnituds of those frequences on the $y$-axis. Only the display the first half of the plot, and adjust the $x$-axis so that it correctly shows the frequencies in Hertz.A.plot(True)
Aminor.plot(True)
Use the DFT to determine the individual notes that are present in mystery_chord.wav.
# Read in the soundfile saving the rate and samples
rate, samples = wavfile.read("mystery_chord.wav")
# Save the size of the sample
n = len(samples)
# Calculate the absolute value of dft keeping only first half
dft = abs(fftpack.fft(samples))[:n//2]
# Sort dft descending order and keep indices
indices = np.argsort(dft)[::-1]
k = indices[:4]
# Use (6.4) to calculate frequencies
frequencies = (k * rate / n)
frequencies
The notes are...
SoundWave.__mul__() for circular convolution.tada.wav.tada.wav and the white noise. Embed the result in the notebook.rate = tada.rate # Create 2 seconds of white noise at a given rate.
white_noise = np.random.randint(-32767, 32767, rate*2, dtype=np.int16)
white = SoundWave(rate, white_noise)
# Convolve white noise and tada sound
white_convolve = white * tada
IPython.display.Audio(rate=rate, data=white_convolve.samples)
# Append circular convolution to itself
full_track = white_convolve >> white_convolve
IPython.display.Audio(rate=rate, data=full_track.samples)
SoundWave.__pow__() for linear convolution.CGC.wav and GCG.wav using SoundWave.__pow__() and scipy.signal.fftconvolve().SoundWave.__pow__() and scipy.signal.fftconvolve() sound the same.# Read in the soundfile and generate a Soundwave class object
CGC=SoundWave(wavfile.read("CGC.wav")[0], wavfile.read("CGC.wav")[1])
GCG=SoundWave(wavfile.read("GCG.wav")[0], wavfile.read("GCG.wav")[1])
# Compute and time the linear convolution of CGC and GCG
%time cgc_convo =CGC ** GCG
# Compute and time the linear convolution of CGC and GCG
%time gcg_convo = signal.fftconvolve(CGC.samples, GCG.samples)
# Embed CGC sound
IPython.display.Audio(rate=rate, data=CGC.samples)
# Embed GCG sound
IPython.display.Audio(rate=rate, data=GCG.samples)
# Embed convolution of CGC and GCG
IPython.display.Audio(rate=CGC.rate, data=cgc_convo.samples)
Use SoundWave.__pow__() or scipy.signal.fftconvolve() to compute the linear convolution of chopin.wav and balloon.wav.
Embed the two original sounds and their convolution in the notebook.
# Embed chopin sound
chopin_rate, chopin_samples = wavfile.read("chopin.wav")
IPython.display.Audio("chopin.wav")
# Embed Balloon sound
balloon_rate, balloon_samples = wavfile.read("balloon.wav")
IPython.display.Audio("balloon.wav")
# Compute and embed convolution of balloon and chopin
convo = signal.fftconvolve(chopin_samples, balloon_samples)
IPython.display.Audio(rate=chopin_rate, data=convo)
SoundWave.clean().noisy1.wav by filtering out frequencies from $1250$-$2600$ Hz. Embed the original and the cleaned versions in the notebook.noisy2.wav. Embed the original and the cleaned versions in the notebook.# Embed first soundfile
noisy1 = SoundWave(wavfile.read("noisy1.wav")[0], wavfile.read("noisy1.wav")[1])
IPython.display.Audio("noisy1.wav")
# Clean first sound file
clean1 = noisy1.clean(1250, 2600)
IPython.display.Audio(rate=noisy1.rate, data=clean1.samples)
# Embed second sound file
noisy2 = SoundWave(wavfile.read("noisy2.wav")[0], wavfile.read("noisy2.wav")[1])
IPython.display.Audio("noisy2.wav")
noisy2.plot(True)
# Clean second soundfile
clean2 = noisy2.clean(1300, 4300)
IPython.display.Audio(rate=noisy1.rate, data=clean2.samples)
vuvuzela.wav by filtering bad frequencies out of the left and right channels individually.# Read in sound files separately to get both channels
vuvuzelaA = SoundWave(wavfile.read("vuvuzela.wav")[0], wavfile.read("vuvuzela.wav")[1][:,0])
vuvuzelaB = SoundWave(wavfile.read("vuvuzela.wav")[0], wavfile.read("vuvuzela.wav")[1][:,1])
# Embed original sound
IPython.display.Audio(rate=vuvuzelaA.rate, data=np.array([vuvuzelaA.samples,vuvuzelaB.samples]))
vuvuzelaA.plot(True)
vuvuzelaB.plot(True)
# Clean frequencies
cleanA = vuvuzelaA.clean(200,500)
cleanB = vuvuzelaB.clean(200,500)
# Recombine channels
cleanStereo = np.vstack((cleanA.samples, cleanB.samples))
IPython.display.Audio(rate=cleanA.rate, data=cleanStereo)
license_plate.png so that the year printed on the sticker in the bottom right corner of the plate is legible.# Read in license plate image
im = imageio.imread("license_plate.png")
# Take FFT
im_dft = fftpack.fft2(im)
# Plot FFT vs Original
plt.subplot(121)
plt.imshow(np.log(np.abs(im_dft)), cmap="gray")
plt.subplot(122)
plt.imshow(im, cmap="gray")
plt.show()
# Create a copy of the FFT
adj_dft = im_dft.copy()
# Fill in the spikes with the average values
mean_dft = np.mean(adj_dft[20:40,90:110])
adj_dft[31:39,98:105] = mean_dft
mean_dft = np.mean(adj_dft[170:190,320:340])
adj_dft[178:185,325:338] = mean_dft
# Show the cleaner image
plt.imshow(fftpack.ifft2(adj_dft).real, cmap="gray")
The year on the sticker is...